%run set_theme.ipynb
import pandas as pd
import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode()
df = pd.read_parquet('../data/SO_2014_2022.pq')
df = df[(df['Salary'] > 0) & (df['Salary'] < 250000)]
df.head()
| Year | Salary | JobSat | YearsCode | YearsCodePro | Age | Education | OrgSize | LastNewJob | Employment | RespondentType | JobSeek | Gender | Student | Country | CodingActivities | DevType | LearnCodeFrom | LangPresent | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2022 | 69318.0 | <NA> | 10 | 5 | 25-34 | master | 500 to 999 employees | <NA> | fulltime | dev | <NA> | male | no | Germany | School or academic work | Data scientist or machine learning specialist;... | Books / Physical media;School (i.e., Universit... | C;C++;Java;JavaScript;MATLAB;Python;Scala;SQL;... |
| 6 | 2022 | 27652.0 | <NA> | 18 | 10 | 25-34 | bachelor | 1,000 to 4,999 employees | <NA> | fulltime | dev | <NA> | male | no | Colombia | Hobby | Developer, full-stack;Developer, back-end | Books / Physical media;Other online resources ... | Bash/Shell/PowerShell;Elixir;HTML/CSS;JavaScri... |
| 9 | 2022 | 15431.0 | <NA> | 5 | 5 | 25-34 | bachelor | 20 to 99 employees | <NA> | fulltime | dev | <NA> | male | no | Ghana | Freelance/contract work | Developer, back-end | On the job training;Coding Bootcamp | JavaScript;Ruby |
| 13 | 2022 | 47352.0 | <NA> | 7 | 7 | 45-54 | master | 10 to 19 employees | <NA> | fulltime | non-dev | <NA> | male | no | Belgium | Hobby | Developer, back-end;Educator or academic;Datab... | Books / Physical media;On the job training;Col... | Delphi;SQL |
| 22 | 2022 | 78084.0 | <NA> | 25 | 25 | 45-54 | bachelor | 500 to 999 employees | <NA> | fulltime | non-dev | <NA> | male | no | Canada | Hobby;Contribute to open-source projects | Engineer, site reliability;Security professional | Books / Physical media;Other online resources ... | Bash/Shell/PowerShell;C;JavaScript;Perl;PHP;Py... |
# calculate mean for each of the age bins
# plot these means for all age bins
male_age_salary_df = df.query('Gender == "male"') \
.groupby(['Gender', 'Age']) \
.agg({'Salary': 'mean'}) \
.reset_index()
female_age_salary_df = df.query('Gender == "female"') \
.groupby(['Gender', 'Age']) \
.agg({'Salary': 'mean'}) \
.reset_index()
age_salary_df = pd.concat([male_age_salary_df, female_age_salary_df])
fig = px.bar(
age_salary_df,
y='Age',
x='Salary',
title='Mean Salary<br><sup>Salary increases as people are getting older</sup>',
orientation='h',
barmode='group',
color='Gender',
color_discrete_map={
'male': '#5b6fec',
'female': '#f854ee'
},
width=790,
)
fig.for_each_trace(lambda t: t.update(hovertemplate='<b>' + t.name.capitalize() + '</b><br>Average salary: %{x:d}<extra></extra>',
hoverlabel={'font_color': 'white', 'bordercolor': 'white'}))
fig.update_layout(
margin={'l': 110, 'b': 120, 'r': 130, 't': 100},
)
fig.add_annotation(x=-0.13, y=-0.32,
xref="paper", yref="paper",
showarrow=False,
align='left',
xanchor='left', yanchor='bottom',
text='Mean salary per age segment for each of the two genders.<br>' + \
'Hover over any bar to view the average salary for a specific age segment.')
fig.show()
C:\Users\Efe\AppData\Local\Temp\ipykernel_19156\3702917771.py:5: FutureWarning:
The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.
C:\Users\Efe\AppData\Local\Temp\ipykernel_19156\3702917771.py:10: FutureWarning:
The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.